true

Spatial Interpolation: meuse

Data

library(sp)
library(gstat)
library(sf)
library(mapview)
library(tidyverse)
data(meuse)
head(meuse)
##        x      y cadmium copper lead zinc  elev       dist   om ffreq soil lime
## 1 181072 333611    11.7     85  299 1022 7.909 0.00135803 13.6     1    1    1
## 2 181025 333558     8.6     81  277 1141 6.983 0.01222430 14.0     1    1    1
## 3 181165 333537     6.5     68  199  640 7.800 0.10302900 13.0     1    1    1
## 4 181298 333484     2.6     81  116  257 7.655 0.19009400  8.0     1    2    0
## 5 181307 333330     2.8     48  117  269 7.480 0.27709000  8.7     1    2    0
## 6 181390 333260     3.0     61  137  281 7.791 0.36406700  7.8     1    2    0
##   landuse dist.m
## 1      Ah     50
## 2      Ah     30
## 3      Ah    150
## 4      Ga    270
## 5      Ah    380
## 6      Ga    470
ggplot(data = meuse) + geom_point(aes(x, y))

data(meuse.grid)
head(meuse.grid)
##        x      y part.a part.b      dist soil ffreq
## 1 181180 333740      1      0 0.0000000    1     1
## 2 181140 333700      1      0 0.0000000    1     1
## 3 181180 333700      1      0 0.0122243    1     1
## 4 181220 333700      1      0 0.0434678    1     1
## 5 181100 333660      1      0 0.0000000    1     1
## 6 181140 333660      1      0 0.0122243    1     1
ggplot(data = meuse.grid) + geom_point(aes(x, y))

Summary Statistics

summary(meuse)
##        x                y             cadmium           copper      
##  Min.   :178605   Min.   :329714   Min.   : 0.200   Min.   : 14.00  
##  1st Qu.:179371   1st Qu.:330762   1st Qu.: 0.800   1st Qu.: 23.00  
##  Median :179991   Median :331633   Median : 2.100   Median : 31.00  
##  Mean   :180005   Mean   :331635   Mean   : 3.246   Mean   : 40.32  
##  3rd Qu.:180630   3rd Qu.:332463   3rd Qu.: 3.850   3rd Qu.: 49.50  
##  Max.   :181390   Max.   :333611   Max.   :18.100   Max.   :128.00  
##                                                                     
##       lead            zinc             elev             dist        
##  Min.   : 37.0   Min.   : 113.0   Min.   : 5.180   Min.   :0.00000  
##  1st Qu.: 72.5   1st Qu.: 198.0   1st Qu.: 7.546   1st Qu.:0.07569  
##  Median :123.0   Median : 326.0   Median : 8.180   Median :0.21184  
##  Mean   :153.4   Mean   : 469.7   Mean   : 8.165   Mean   :0.24002  
##  3rd Qu.:207.0   3rd Qu.: 674.5   3rd Qu.: 8.955   3rd Qu.:0.36407  
##  Max.   :654.0   Max.   :1839.0   Max.   :10.520   Max.   :0.88039  
##                                                                     
##        om         ffreq  soil   lime       landuse       dist.m      
##  Min.   : 1.000   1:84   1:97   0:111   W      :50   Min.   :  10.0  
##  1st Qu.: 5.300   2:48   2:46   1: 44   Ah     :39   1st Qu.:  80.0  
##  Median : 6.900   3:23   3:12           Am     :22   Median : 270.0  
##  Mean   : 7.478                         Fw     :10   Mean   : 290.3  
##  3rd Qu.: 9.000                         Ab     : 8   3rd Qu.: 450.0  
##  Max.   :17.000                         (Other):25   Max.   :1000.0  
##  NA's   :2                              NA's   : 1

Bubble plot

ggplot(data = meuse) + geom_point(aes(x, y, size=zinc))

Spatial Data Wrangling

class(meuse)
## [1] "data.frame"
class(meuse.grid)
## [1] "data.frame"

Convert the meuse and meuse.grid data frames to sf objects using the st_as_sf()

meuse2 <- st_as_sf(meuse, coords = c("x", "y"), crs = 28992)
meuse.grid2 <- st_as_sf(meuse.grid, coords = c("x", "y"),
                       crs = 28992)

Recheck class type

class(meuse2)
## [1] "sf"         "data.frame"
class(meuse.grid2)
## [1] "sf"         "data.frame"
head(meuse2)
## Simple feature collection with 6 features and 12 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181025 ymin: 333260 xmax: 181390 ymax: 333611
## Projected CRS: Amersfoort / RD New
##   cadmium copper lead zinc  elev       dist   om ffreq soil lime landuse dist.m
## 1    11.7     85  299 1022 7.909 0.00135803 13.6     1    1    1      Ah     50
## 2     8.6     81  277 1141 6.983 0.01222430 14.0     1    1    1      Ah     30
## 3     6.5     68  199  640 7.800 0.10302900 13.0     1    1    1      Ah    150
## 4     2.6     81  116  257 7.655 0.19009400  8.0     1    2    0      Ga    270
## 5     2.8     48  117  269 7.480 0.27709000  8.7     1    2    0      Ah    380
## 6     3.0     61  137  281 7.791 0.36406700  7.8     1    2    0      Ga    470
##                geometry
## 1 POINT (181072 333611)
## 2 POINT (181025 333558)
## 3 POINT (181165 333537)
## 4 POINT (181298 333484)
## 5 POINT (181307 333330)
## 6 POINT (181390 333260)
head(meuse.grid2)
## Simple feature collection with 6 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   part.a part.b      dist soil ffreq              geometry
## 1      1      0 0.0000000    1     1 POINT (181180 333740)
## 2      1      0 0.0000000    1     1 POINT (181140 333700)
## 3      1      0 0.0122243    1     1 POINT (181180 333700)
## 4      1      0 0.0434678    1     1 POINT (181220 333700)
## 5      1      0 0.0000000    1     1 POINT (181100 333660)
## 6      1      0 0.0122243    1     1 POINT (181140 333660)

Interactive maps

mapview(meuse2, zcol = "zinc",  map.types = "CartoDB.Voyager")
mapview(meuse.grid2,  map.types = "CartoDB.Voyager")

EDA

ggplot(meuse, aes(x=dist, y=zinc)) + geom_point()

ggplot(meuse, aes(x=dist, y=log(zinc))) + geom_point()

ggplot(meuse, aes(x=sqrt(dist), y=log(zinc))) + geom_point()

library(GGally)
ggpairs(meuse)

ggpairs(meuse[,3:9])
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_density()`).

Simple Linear Regression Model

lm <- lm(log(zinc)~sqrt(dist), meuse)
meuse$residuals <- residuals(lm)
meuse$fitted <- fitted(lm) 
meuse$fitted2 <- predict(lm, meuse) 
meuse$fitted.s <- predict(lm, meuse) - mean(predict(lm, meuse))
head(meuse)
##        x      y cadmium copper lead zinc  elev       dist   om ffreq soil lime
## 1 181072 333611    11.7     85  299 1022 7.909 0.00135803 13.6     1    1    1
## 2 181025 333558     8.6     81  277 1141 6.983 0.01222430 14.0     1    1    1
## 3 181165 333537     6.5     68  199  640 7.800 0.10302900 13.0     1    1    1
## 4 181298 333484     2.6     81  116  257 7.655 0.19009400  8.0     1    2    0
## 5 181307 333330     2.8     48  117  269 7.480 0.27709000  8.7     1    2    0
## 6 181390 333260     3.0     61  137  281 7.791 0.36406700  7.8     1    2    0
##   landuse dist.m   residuals   fitted  fitted2     fitted.s
## 1      Ah     50  0.02907908 6.900438 6.900438  1.014661840
## 2      Ah     30  0.32712956 6.712531 6.712531  0.826754936
## 3      Ah    150  0.28533439 6.176134 6.176134  0.290357936
## 4      Ga    270 -0.33385786 5.882934 5.882934 -0.002841905
## 5      Ah    380 -0.05778586 5.652497 5.652497 -0.233278608
## 6      Ga    470  0.18211082 5.456244 5.456244 -0.429532005
library(viridis)
## Loading required package: viridisLite
ggplot(meuse, aes(x=x, y=y, col=fitted))+geom_point()+scale_color_viridis()

ggplot(meuse, aes(x=x, y=y, col=residuals))+geom_point()+scale_color_viridis()

ggplot(meuse, aes(x=x, y=y, col=fitted.s))+geom_point()+scale_color_viridis()

Inverse Distance Interpolation

library(stars) |> suppressPackageStartupMessages()
library(gstat)
idw_result <- idw(zinc~1, meuse2, meuse.grid2)
## [inverse distance weighted interpolation]
idw_result
## Simple feature collection with 3103 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 178460 ymin: 329620 xmax: 181540 ymax: 333740
## Projected CRS: Amersfoort / RD New
## First 10 features:
##    var1.pred var1.var              geometry
## 1   633.6864       NA POINT (181180 333740)
## 2   712.5450       NA POINT (181140 333700)
## 3   654.1617       NA POINT (181180 333700)
## 4   604.4422       NA POINT (181220 333700)
## 5   857.2558       NA POINT (181100 333660)
## 6   755.5061       NA POINT (181140 333660)
## 7   667.7526       NA POINT (181180 333660)
## 8   604.4254       NA POINT (181220 333660)
## 9  1003.8711       NA POINT (181060 333620)
## 10  945.0017       NA POINT (181100 333620)
# Convert IDW output to a data frame
idw_df <- as.data.frame(idw_result)
head(idw_df)
##   var1.pred var1.var              geometry
## 1  633.6864       NA POINT (181180 333740)
## 2  712.5450       NA POINT (181140 333700)
## 3  654.1617       NA POINT (181180 333700)
## 4  604.4422       NA POINT (181220 333700)
## 5  857.2558       NA POINT (181100 333660)
## 6  755.5061       NA POINT (181140 333660)
colnames(idw_df) <- c("x", "y", "zinc_pred")
head(idw_df)
##          x  y             zinc_pred
## 1 633.6864 NA POINT (181180 333740)
## 2 712.5450 NA POINT (181140 333700)
## 3 654.1617 NA POINT (181180 333700)
## 4 604.4422 NA POINT (181220 333700)
## 5 857.2558 NA POINT (181100 333660)
## 6 755.5061 NA POINT (181140 333660)

Visualize IDW values

ggplot(data = meuse2) + geom_sf(data = idw_result, aes(color = var1.pred)) +
  geom_sf() +
  scale_color_viridis() + theme_bw()

Spatial Kriging

Lagged Scatter plots

hscat(log(zinc)~1, meuse2, (0:9)*100)

Variogram Cloud

vc <- variogram(log(zinc) ~ 1, meuse2, cloud = TRUE)
plot(vc)

Binned Variogram/ Sample Variogram

v <- variogram(log(zinc) ~ 1, data = meuse2)
plot(v)

List of available models for variograms

vgm()
##    short                                      long
## 1    Nug                              Nug (nugget)
## 2    Exp                         Exp (exponential)
## 3    Sph                           Sph (spherical)
## 4    Gau                            Gau (gaussian)
## 5    Exc        Exclass (Exponential class/stable)
## 6    Mat                              Mat (Matern)
## 7    Ste Mat (Matern, M. Stein's parameterization)
## 8    Cir                            Cir (circular)
## 9    Lin                              Lin (linear)
## 10   Bes                              Bes (bessel)
## 11   Pen                      Pen (pentaspherical)
## 12   Per                            Per (periodic)
## 13   Wav                                Wav (wave)
## 14   Hol                                Hol (hole)
## 15   Log                         Log (logarithmic)
## 16   Pow                               Pow (power)
## 17   Spl                              Spl (spline)
## 18   Leg                            Leg (Legendre)
## 19   Err                   Err (Measurement error)
## 20   Int                           Int (Intercept)
show.vgms(par.strip.text = list(cex = 0.75))

Fitting Variogram Models

Trial 1

Assess our initial model

vg.fit1 <- vgm(psill = 0.5, model = "Sph",
                range = 900, nugget = 0.1)
vg.fit1 
##   model psill range
## 1   Nug   0.1     0
## 2   Sph   0.5   900
plot(v, vg.fit1 , cutoff = 1000, cex = 1.5)

Fit a variogram providing the sample variogram

fv <- fit.variogram(object = v,
                    model = vgm(psill = 0.5, model = "Sph",
                                range = 900, nugget = 0.1))
fv
##   model      psill    range
## 1   Nug 0.05066017   0.0000
## 2   Sph 0.59060556 897.0044
attr(fv, "SSErr")
## [1] 9.011194e-06
plot(v, fv, cex = 1.5)

Trial 2

fv2 <- fit.variogram(object = v,
                    model = vgm(psill = 0.6, model = "Sph",range = 900, nugget = 0.1))
fv2
##   model      psill    range
## 1   Nug 0.05066522   0.0000
## 2   Sph 0.59061054 897.0412
attr(fv2, "SSErr")
## [1] 9.011195e-06
plot(v, fv2, cex = 1.5)

Trial 3

fv3 <- fit.variogram(object = v,
                    model = vgm(psill = 1, model = "Exp",range = 800, nugget = 0.1))
fv3
##   model     psill    range
## 1   Nug 0.0000000   0.0000
## 2   Exp 0.7186519 449.7572
attr(fv3, "SSErr")
## [1] 1.628328e-05
plot(v, fv3, cex = 1.5)

If the process is isotropic we can directly go for kriging

library(ggplot2)
library(viridis)

k <- gstat(formula = log(zinc) ~ 1, data = meuse2, model = fv)
kpred <- predict(k, meuse.grid2)
## [using ordinary kriging]
head(kpred)
## Simple feature collection with 6 features and 2 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 181100 ymin: 333660 xmax: 181220 ymax: 333740
## Projected CRS: Amersfoort / RD New
##   var1.pred  var1.var              geometry
## 1  6.499619 0.3198083 POINT (181180 333740)
## 2  6.622352 0.2520197 POINT (181140 333700)
## 3  6.505162 0.2729850 POINT (181180 333700)
## 4  6.387586 0.2955288 POINT (181220 333700)
## 5  6.764491 0.1779405 POINT (181100 333660)
## 6  6.635511 0.2022032 POINT (181140 333660)

Visualisation of Predictions

ggplot() + geom_sf(data = kpred, aes(color = var1.pred)) +
  geom_sf(data = meuse2) +
  scale_color_viridis(name = "log(zinc)") + theme_bw()

ggplot() + geom_sf(data = kpred, aes(color = var1.var)) +
  geom_sf(data = meuse2) +
  scale_color_viridis(name = "variance") + theme_bw()

Detecting Anisotropy

vg.dir <- variogram(log(zinc)~1, alpha = c(0, 45, 90, 135), meuse2)
plot(vg.dir)

Fit Anisotropic Variogram Models

The function call vgm(psill=0.59, model = “Sph”, range = 1200, nugget = 0.05, anis = c(45, 0.4)) in R creates a variogram model using the vgm() function from the gstat package. Here’s what each parameter means:

  • psill = 0.59: This is the partial sill, representing the difference between the sill (total variance) and the nugget effect. It defines the maximum semivariance minus the nugget.

  • model = “Sph”: The model type is Spherical, one of the commonly used variogram models. It describes how spatial correlation decreases with distance.

  • range = 1200: This is the range, the distance at which the spatial correlation drops to near zero (or 95% of the sill for the spherical model). Beyond this distance, observations are considered uncorrelated.

  • nugget = 0.05: The nugget effect, representing spatial variability at very small scales or measurement errors. It accounts for discontinuity at very short distances.

  • anis = c(45, 0.4): Specifies anisotropy, meaning the spatial correlation differs by direction.

  • 45 indicates the direction (angle in degrees from the x-axis) where the correlation is strongest.

  • 0.4 is the anisotropy ratio, meaning correlation decays faster in the perpendicular direction. A value of 0.4 means that in the perpendicular direction, the range is only 40% of the main direction’s range.

fit.ani <- vgm(psill=0.59, model = "Sph", range = 1200, nugget = 0.05, anis = c(45, 0.4))
plot(vg.dir, fit.ani)

This variogram model describes spatial dependence using a spherical variogram with anisotropy. The correlation extends up to 1200 units in the primary direction (45°), but only 480 units (1200 × 0.4) in the perpendicular direction. There is a small nugget effect (0.05), indicating slight short-scale variation or measurement noise.

Variogram Maps

Another way to detect directional dependence

vg.map <- variogram(log(zinc)~1, meuse2, map=T, cutoff=1000, width=100)
plot(vg.map, threshold=5)

Explanation of the Code: vg.map <- variogram(log(zinc) ~ 1, meuse, map = TRUE, cutoff = 1000, width = 100)

This code computes and visualizes the variogram map of the log-transformed zinc concentrations in the Meuse dataset.

  1. This function calculates a directional semivariogram for the spatial data.
  • variogram(log(zinc) ~ 1, meuse, …)

    Computes the experimental semivariogram for log-transformed zinc concentrations. The ~ 1 formula means it calculates the variogram without considering covariates (i.e., only based on spatial distances).
  • map = TRUE

    Instead of returning a traditional one-dimensional variogram, this creates a spatial variogram map, which helps identify anisotropy (directional dependence of spatial variation).

  • cutoff = 1000

    Defines the maximum lag distance (1,000 meters). Pairs of points separated by distances greater than this are ignored.

  • width = 100

    Sets the bin width for grouping distances. Distance intervals of 100 meters are used when computing the variogram.

  1. Breakdown of plot(vg.map, threshold = 5) This function plots the variogram map.
  • plot(vg.map, …)

    Plots the spatial variogram map, where each pixel represents a directional semivariance value for a given spatial lag.

  • threshold = 5

    Sets a threshold to filter high semivariance values to make the plot more interpretable.

Model residuals obtain from log(zinc)~sqrt(dist)

vgreg.est <- variogram(log(zinc)~sqrt(dist), meuse2)
plot(vgreg.est)

vgreg.fit <- fit.variogram(vgreg.est, vgm(0.2, "Sph", 800, 0.05))
vgreg.fit
##   model      psill    range
## 1   Nug 0.07978675   0.0000
## 2   Sph 0.14902954 871.8744
plot(vgreg.est, vgreg.fit, cex = 1.5)

vgreg.dir <- variogram(log(zinc)~sqrt(dist), meuse2, alpha=c(0, 45, 90, 135))
plot(vgreg.dir)

plot(vgreg.dir, vgreg.fit, cex = 1.5)

The directional dependence is much less obvious in this case.

Another way to show this is variogram maps

vgreg.map <- variogram(log(zinc)~sqrt(dist), meuse2, map=T, cutoff=1000, width=100)
plot(vgreg.map, threshold=5)